import os
from docx import Document
import pandas as pd

import Document_AI

File_list = {}


def Read(url):
    # 创建一个空字符串来存储文档内容
    essay = ""
    # 使用python-docx库中的Document类打开指定的Word文档
    doc = Document(url)
    # 遍历文档中的段落
    for paragraph in doc.paragraphs:
        # 将每个段落的文本添加到essay字符串中，并在每个段落之后添加一个换行符
        essay += paragraph.text + "\n"
    # 返回整个文档的文本内容
    return essay


def Read_Excel(url):
    # 读取Excel文件
    df = pd.read_excel(url)
    # 将读取的Excel数据框转换为字符串格式，不包括索引
    df_as_string = df.to_string(index=False)
    # 返回包含Excel内容的字符串
    return df_as_string


def Read_Text(url):
    # 读取文本文件
    with open(url, "r", encoding="utf-8") as f:
        # 读取文件内容
        essay = f.read()
        # 返回文件内容
    return essay


def File():
    # 指定要遍历的目录路径
    directory_path = '分析文件'  # 替换为您要遍历的目录的实际路径

    # 使用os.listdir()列出目录中的所有内容
    contents = os.listdir(directory_path)

    # 遍历目录并将文件名存储在File_list字典中，同时获取文件类型
    i = 0
    for item in contents:
        i += 1
        file_path = os.path.join(directory_path, item)
        file_type = get_file_type(file_path)
        File_list[i] = (file_path, file_type)


def get_file_type(url):
    file_extension = os.path.splitext(url)[1].lower()
    if file_extension in (".xlsx", ".xls"):
        return "Excel"
    elif file_extension in (".docx", ".doc"):
        return "Word"
    elif file_extension == ".txt":
        return "Text"
    else:
        return "未知类型"


def main():
    File()
    for i, (k, file_type) in File_list.items():
        print(f"{i},\t{k} ({file_type})")

    nu = int(input("请选择你要分析的文档:"))
    selected_file, selected_file_type = File_list[nu]
    if selected_file_type == "Word":
        txt = Read(selected_file)
        Document_AI.main(txt)
    elif selected_file_type == "Excel":
        txt = Read_Excel(selected_file)
        Document_AI.main(txt)
    elif selected_file_type == "Text":
        txt = Read_Text(selected_file)
        Document_AI.main(txt)



